Read data

df <- read_excel("wuhan_blood_sample_data_Jan_Feb_2020.xlsx")

Clean dataset

patients_df <- df %>% group_by(`Admission time`, `Discharge time`, gender, age, outcome) %>%
summarise(PATIENT_ID = sum(PATIENT_ID, na.rm = TRUE), `Total records` = n()) 

patients_df <- patients_df %>%
    mutate(`Days in hospital` = ceiling(difftime(`Discharge time`, `Admission time`, units = "days")))

df <- full_join(patients_df %>% ungroup() %>% select(`Admission time`, PATIENT_ID), df %>% select(-PATIENT_ID), by="Admission time")

df$gender<-ifelse(df$gender==1, 'Male', 'Female') 
df <- df %>% mutate(gender = as.factor(gender))

patients_df$gender<-ifelse(patients_df$gender==1, 'Male', 'Female') 
patients_df <- patients_df %>% mutate(gender = as.factor(gender))

df$outcome<-ifelse(df$outcome==1, 'Death', 'Survival')
df <- df %>% mutate(outcome = as.factor(outcome))

patients_df$outcome<-ifelse(patients_df$outcome==1, 'Death', 'Survival')
patients_df <- patients_df %>% mutate(outcome = as.factor(outcome))

patients_df <- df %>% group_by(PATIENT_ID, outcome, `Admission time`, `Discharge time`, gender, age) %>% summarise(missing_test=is.na(RE_DATE)) %>%
  group_by(PATIENT_ID, outcome, `Admission time`, `Discharge time`, gender, age) %>%
  summarise(test_provided = ifelse(missing_test, 0, 1)) %>%
  group_by(PATIENT_ID, outcome, `Admission time`, `Discharge time`, gender, age) %>%
  summarise(`Total blood tests`=sum(test_provided)) %>%
  mutate(`Days in hospital` = ceiling(difftime(`Discharge time`, `Admission time`, units = "days")))

last_test_df <- df %>% 
  group_by(PATIENT_ID, outcome, gender, age) %>% 
  fill_(names(df)) %>% 
  fill_(names(df), "up") %>% 
  summarise_at(vars(`Hypersensitive cardiac troponinI`:creatinine), function(x) last(x,order_by = is.na(x)))


cleaned_df <- last_test_df %>% ungroup() %>% select(PATIENT_ID, outcome, age, gender, hemoglobin, `eosinophils(%)`, `Alkaline phosphatase`, albumin, `basophil(%)`, `Total bilirubin`, `Platelet count`, `monocytes(%)`, `neutrophils(%)`, `total protein`, `mean corpuscular volume`, hematocrit, `White blood cell count`, `mean corpuscular hemoglobin concentration`, Urea, `lymphocyte count`, `Red blood cell count`, `Eosinophil count`, `neutrophils count`, `Direct bilirubin`, `(%)lymphocyte`, `Total cholesterol`, `aspartate aminotransferase`, `Uric acid`, `HCO3-`, `Lactate dehydrogenase`, `monocytes count`, globulin, `γ-glutamyl transpeptidase`, `basophil count(#)`, `mean corpuscular hemoglobin`, `glutamic-pyruvic transaminase`, eGFR, creatinine) %>% filter_all(function(x) !is.na(x)) 

Patients summary

patients_summary <- patients_df %>% ungroup() %>% select(-c(age, PATIENT_ID))

tbl_summary(
    patients_summary,
    by = outcome,
    label = gender ~ "Gender",
) %>%
    add_n() %>%
    modify_header(label = "") %>%
    add_overall() %>%
    bold_labels() 
Overall, N = 3751 N Death, N = 1741 Survival, N = 2011
Gender 375
Female 151 (40%) 48 (28%) 103 (51%)
Male 224 (60%) 126 (72%) 98 (49%)
Total blood tests 16 (9, 21) 375 14 (7, 24) 16 (12, 20)
Days in hospital 10 (5, 16) 375 6 (3, 10) 14 (10, 18)

1 Statistics presented: n (%); Median (IQR)

 ggplot(patients_df, aes(x=age,fill=gender)) + geom_histogram(binwidth = 1) + facet_grid(. ~ gender) + scale_x_continuous(name="Age", limits=c(min(df$age), max(df$age)), breaks = seq(0, 100, by=10)) + scale_y_continuous(name = "Number of patients", limits = c(0,10), breaks = seq(0,10, by=1)) +
    theme_minimal()

Patients grouped by outcome, age and gender

 ggplot(patients_df, aes(x=age,fill=outcome)) + geom_histogram(binwidth = 1) + facet_grid(outcome ~ gender) + scale_x_continuous(name="Age", limits=c(min(df$age), max(df$age)), breaks = seq(0, 100, by=10)) + scale_y_continuous(name = "Number of patients", limits = c(0,9), breaks = seq(0,9, by=1)) +
    theme_minimal()

Patients grouped by outcome and hospitalization duration

ggplot(patients_df, aes(x=`Days in hospital`, fill=outcome)) + geom_histogram(binwidth = 1) + facet_grid(. ~ outcome)  + ylab("Number of patients") +
    theme_minimal()

Patients grouped by outcome and total blood tests taken

ggplot(patients_df, aes(x=`Total blood tests`, fill=outcome)) + geom_histogram(binwidth = 1) + facet_grid(. ~ outcome)  + ylab("Number of patients") +
  scale_x_continuous(name="Total blood tests", limits=c(0, 60), breaks = seq(0, 60, by=10)) +
   scale_y_continuous(name = "Number of patients", limits = c(0,20), breaks = seq(0,20, by=2)) +
    theme_minimal()

Removed data

patients_df %>% filter(`Total blood tests`==0) 
PATIENT_ID outcome Admission time Discharge time gender age Total blood tests Days in hospital
187 Survival 2020-02-17 18:56:09 2020-02-20 20:55:31 Male 44 0 4 days
189 Survival 2020-02-10 04:37:30 2020-02-10 13:54:23 Male 61 0 1 days
192 Survival 2020-02-16 17:14:30 2020-02-16 21:05:17 Male 34 0 1 days
197 Survival 2020-02-10 05:01:15 2020-02-11 15:40:41 Male 67 0 2 days
200 Survival 2020-02-16 04:41:21 2020-02-16 15:26:13 Male 25 0 1 days
201 Survival 2020-02-17 21:30:07 2020-02-20 13:05:11 Male 39 0 3 days
253 Death 2020-02-13 21:05:54 2020-02-14 11:00:05 Male 51 0 1 days
268 Death 2020-02-14 11:46:36 2020-02-15 10:15:28 Male 69 0 1 days
285 Death 2020-01-31 23:20:40 2020-02-01 03:16:34 Male 63 0 1 days
289 Death 2020-02-01 02:12:05 2020-02-01 10:54:57 Male 63 0 1 days
311 Death 2020-02-11 23:45:15 2020-02-15 09:02:41 Female 77 0 4 days
347 Death 2020-02-11 22:25:20 2020-02-15 10:03:32 Female 80 0 4 days
354 Death 2020-02-03 21:22:41 2020-02-04 01:03:11 Male 57 0 1 days
359 Death 2020-02-11 01:42:48 2020-02-14 09:38:13 Male 65 0 4 days

Biomarkers

no_dates_df <- df %>% select(-c('Admission time', 'Discharge time', 'RE_DATE', 'PATIENT_ID', 'age', 'gender' ))
tbl_summary(
    no_dates_df,
    by = outcome,
    missing = "no"
            ) %>%
    add_n() %>%
    modify_header(label = "**Biomarker**") %>%
  modify_spanning_header(c("stat_1", "stat_2") ~ "**Final patient outcome related to the test**") %>%
    bold_labels() 
Biomarker N Final patient outcome related to the test
Death, N = 2,9051 Survival, N = 3,2151
Hypersensitive cardiac troponinI 507 70 (18, 631) 3 (2, 7)
hemoglobin 975 123 (110, 135) 127 (116, 138)
Serum chloride 975 104 (100, 111) 101 (99, 103)
Prothrombin time 662 16.3 (15.0, 18.2) 13.6 (13.1, 14.1)
procalcitonin 459 0.38 (0.14, 1.13) 0.04 (0.02, 0.06)
eosinophils(%) 957 0.00 (0.00, 0.10) 0.70 (0.00, 1.80)
Interleukin 2 receptor 268 1,180 (807, 1,603) 529 (400, 742)
Alkaline phosphatase 930 83 (64, 123) 60 (50, 75)
albumin 934 28 (24, 31) 36 (34, 39)
basophil(%) 957 0.10 (0.10, 0.20) 0.20 (0.10, 0.40)
Interleukin 10 267 11 (6, 17) 5 (5, 8)
Total bilirubin 930 14 (10, 25) 8 (6, 12)
Platelet count 957 112 (55, 174) 229 (176, 290)
monocytes(%) 958 3.0 (2.0, 4.7) 8.2 (6.3, 10.0)
antithrombin 330 80 (70, 92) 93 (86, 103)
Interleukin 8 268 30 (18, 61) 11 (7, 19)
indirect bilirubin 906 6.2 (4.2, 9.2) 4.9 (3.4, 7.1)
Red blood cell distribution width 923 13.20 (12.40, 14.40) 12.20 (11.80, 12.80)
neutrophils(%) 957 92 (88, 95) 66 (56, 76)
total protein 931 62 (57, 68) 68 (65, 72)
Quantification of Treponema pallidum antibodies 279 0.06 (0.04, 0.07) 0.05 (0.04, 0.07)
Prothrombin activity 659 66 (56, 78) 94 (88, 103)
HBsAg 279 0.01 (0.00, 0.02) 0.00 (0.00, 0.01)
mean corpuscular volume 957 91.3 (87.1, 96.4) 89.8 (86.8, 91.9)
hematocrit 957 35.9 (32.5, 39.8) 37.1 (34.3, 39.9)
White blood cell count 1,127 12 (8, 17) 6 (4, 8)
Tumor necrosis factorα 268 11 (8, 17) 8 (6, 10)
mean corpuscular hemoglobin concentration 957 342 (331, 350) 343 (335, 350)
fibrinogen 566 3.92 (2.44, 5.63) 4.40 (3.56, 5.34)
Interleukin 1β 268 5.0 (5.0, 5.0) 5.0 (5.0, 5.0)
Urea 936 11 (7, 17) 4 (3, 5)
lymphocyte count 957 0.46 (0.31, 0.69) 1.25 (0.87, 1.62)
PH value 384 6.50 (6.00, 7.41) 6.50 (6.00, 7.00)
Red blood cell count 1,127 4.0 (3.6, 4.6) 4.2 (3.8, 4.7)
Eosinophil count 957 0.00 (0.00, 0.01) 0.03 (0.00, 0.09)
Corrected calcium 914 2.35 (2.27, 2.44) 2.37 (2.27, 2.44)
Serum potassium 980 4.60 (4.04, 5.27) 4.28 (3.92, 4.62)
glucose 775 9.1 (6.9, 13.3) 5.7 (5.0, 7.6)
neutrophils count 957 10.8 (7.0, 15.2) 3.5 (2.4, 5.2)
Direct bilirubin 930 8 (5, 14) 4 (2, 5)
Mean platelet volume 862 11.30 (10.70, 12.20) 10.40 (9.90, 11.00)
ferritin 283 1,636 (928, 2,517) 504 (235, 834)
RBC distribution width SD 923 43.7 (39.9, 48.5) 39.5 (37.6, 41.4)
Thrombin time 566 17.30 (15.80, 19.75) 16.40 (15.60, 17.30)
(%)lymphocyte 958 4 (2, 7) 24 (16, 33)
HCV antibody quantification 279 0.07 (0.04, 0.11) 0.06 (0.04, 0.08)
D-D dimer 630 19 (3, 21) 1 (0, 1)
Total cholesterol 931 3.32 (2.72, 3.88) 3.93 (3.39, 4.48)
aspartate aminotransferase 935 38 (25, 59) 21 (17, 29)
Uric acid 934 245 (166, 374) 240 (193, 304)
HCO3- 934 21.8 (18.8, 24.7) 24.7 (22.8, 26.7)
calcium 979 2.00 (1.90, 2.08) 2.17 (2.10, 2.25)
Amino-terminal brain natriuretic peptide precursor(NT-proBNP) 475 1,467 (516, 4,578) 64 (23, 166)
Lactate dehydrogenase 934 593 (431, 840) 220 (189, 278)
platelet large cell ratio 862 35 (30, 42) 28 (23, 33)
Interleukin 6 272 66 (30, 142) 8 (2, 21)
Fibrin degradation products 330 114 (18, 150) 4 (4, 4)
monocytes count 957 0.36 (0.20, 0.58) 0.43 (0.32, 0.58)
PLT distribution width 862 13.60 (12.10, 15.93) 11.70 (10.70, 13.00)
globulin 930 34.1 (30.2, 38.2) 31.8 (29.5, 35.2)
γ-glutamyl transpeptidase 930 42 (27, 79) 29 (19, 46)
International standard ratio 659 1.31 (1.17, 1.48) 1.04 (0.99, 1.09)
basophil count(#) 957 0.010 (0.010, 0.030) 0.010 (0.010, 0.020)
2019-nCoV nucleic acid detection 501
-1 57 (100%) 444 (100%)
mean corpuscular hemoglobin 957 31.20 (29.90, 32.70) 30.70 (29.60, 31.90)
Activation of partial thromboplastin time 568 40 (36, 45) 39 (35, 43)
High sensitivity C-reactive protein 737 114 (65, 191) 7 (2, 35)
HIV antibody quantification 278 0.08 (0.07, 0.11) 0.09 (0.08, 0.11)
serum sodium 975 142 (138, 148) 140 (138, 141)
thrombocytocrit 862 0.15 (0.10, 0.21) 0.24 (0.19, 0.30)
ESR 383 36 (16, 59) 26 (13, 40)
glutamic-pyruvic transaminase 931 26 (18, 44) 21 (15, 36)
eGFR 936 72 (43, 91) 100 (85, 114)
creatinine 936 88 (68, 130) 64 (54, 83)

1 Statistics presented: Median (IQR); n (%)

Finding correlation

Finding correlation between outcome and other variables. Ignoring Admission time, PATIENT_ID, RE_DATE and Discharge time, and dropping outcome, which correlation with itself is equal to 1.0.

cor_df <-df

cor_df$outcome<-ifelse(cor_df$outcome=='Death', 1, 0)
cor_df$gender<-ifelse(cor_df$gender=='Male', 1, 0)

cor_df <- cor_df %>% rename(isMale=gender)
cor_df <- cor_df %>% rename(Death=outcome)

cor_df <- cor_df %>% select (-c(`Admission time`, PATIENT_ID, RE_DATE, `Discharge time` ))

corrMatrix <- cor(cor_df[sapply(cor_df, is.numeric)], use='pairwise.complete.obs')

correlation_df <- as.data.frame(corrMatrix)

correlation_df %>% rownames_to_column('variable') %>% filter(variable != 'Death') %>% select(variable, Death) %>% mutate(Death = abs(Death)) %>%
  arrange(desc(Death)) %>%
  rename(`Outcome correlation` = Death) %>%
  head(10) 
variable Outcome correlation
neutrophils(%) 0.7258383
(%)lymphocyte 0.7230203
albumin 0.6892432
Prothrombin activity 0.6562405
High sensitivity C-reactive protein 0.6524696
D-D dimer 0.6368244
Lactate dehydrogenase 0.6230357
neutrophils count 0.6071749
Fibrin degradation products 0.6044110
age 0.5860651

Use of correlation

In following section, we will use 3 biomarkers that are most correlated to outcome and visualize theirs mean values among each patient at 3D graph. So we take only these patients who had all of these 3 biomarkers tested at least once. If somebody was tested more than once, then mean value of all tests for each biomarker is calculated.

#TODO: Take for correlation cleaned_df and check if it's consistent with predictive model

new_df  <- df

new_df <- new_df %>% select(PATIENT_ID, `neutrophils(%)`, `(%)lymphocyte`, albumin, outcome) %>%
    group_by(PATIENT_ID, outcome) %>%
    summarise(Neutrophils_mean = mean(`neutrophils(%)`, na.rm = TRUE), Lymphocyte_mean = mean(`(%)lymphocyte`, na.rm = TRUE), albumin_mean = mean(`albumin`, na.rm = TRUE))

new_df <- new_df %>% filter(!is.nan(Neutrophils_mean) & !is.nan(Lymphocyte_mean) & !is.nan(albumin_mean))


mycolors <- c('royalblue1', 'darkcyan')
new_df$color <- mycolors[ as.numeric(new_df$outcome) ]

par(mar=c(0,0,0,0))
plot3d( 
    x=new_df$Neutrophils_mean, y=new_df$Lymphocyte_mean, z=new_df$albumin_mean, 
    col = new_df$color, 
    type = 's', 
    radius = 1,
    legend=TRUE,
    xlab="Neutrophils(%)", ylab="Lymphocyte(%)", zlab="Albumin")
legend3d("topright", legend = c('Death', 'Survival'), pch = 10, col = mycolors, cex=0.8, inset=c(0.02))

writeWebGL( filename="3d_correlation_mean.html" ,  width=600, height=600)

Data after cleaning and transformation:

tbl_summary(
    new_df %>% ungroup() %>%rename(`Neutrophils(%)` = Neutrophils_mean, `Lymphocyte(%)` = Lymphocyte_mean, `Albumin`= albumin_mean) %>% select(-PATIENT_ID, -color),
    by = outcome
) %>%
    add_n() %>%
    modify_header(label = "**Mean value of**") %>%
    add_overall() %>%
    bold_labels() 
Mean value of Overall, N = 3541 N Death, N = 1621 Survival, N = 1921
Neutrophils(%) 77 (63, 91) 354 91 (87, 94) 66 (56, 72)
Lymphocyte(%) 15 (5, 26) 354 5 (3, 8) 25 (18, 31)
Albumin 33.1 (29.1, 37.0) 354 28.9 (26.3, 31.2) 36.5 (33.9, 39.4)

1 Statistics presented: Median (IQR)

htmltools::includeHTML("./3d_correlation_mean.html")
RGL model

You must enable Javascript to view this page properly.


Drag mouse to rotate model. Use mouse wheel or middle button to zoom it.

Object written from rgl 0.100.54 by writeWebGL.

Common biomarkers’ change in time

biomarker_popularity <- df %>% group_by(PATIENT_ID, outcome) %>% summarise_at(vars(`Hypersensitive cardiac troponinI`:creatinine), function(x) sum(!is.na(x)))
biomarker_popularity$sum_of_single_test <- rowSums(biomarker_popularity %>% ungroup() %>% select(-PATIENT_ID, -outcome))

most_tested_death <- biomarker_popularity %>% select(sum_of_single_test, outcome, `neutrophils(%)`,  `(%)lymphocyte`, albumin) %>% filter(outcome=='Death') %>% arrange(desc(sum_of_single_test)) %>% head(10)
most_tested_survival <- biomarker_popularity %>% select(sum_of_single_test, outcome, `neutrophils(%)`,  `(%)lymphocyte`, albumin) %>% filter(outcome=='Survival') %>% arrange(desc(sum_of_single_test)) %>% head(10)

foo <- rbind(most_tested_death, most_tested_survival)

patients_tests <- merge(foo %>% ungroup() %>% select(PATIENT_ID), df %>% ungroup() %>% select(PATIENT_ID, RE_DATE,`neutrophils(%)`,  `(%)lymphocyte`, albumin, outcome),  by="PATIENT_ID") %>% filter(!is.na(`neutrophils(%)`) |  !is.na(`(%)lymphocyte`) | !is.na(albumin))

neutrophils_seq <- patients_tests %>% filter(!is.na(`neutrophils(%)`)) 

neutrophils_seq %>% mutate(PATIENT_ID=as.factor(PATIENT_ID)) %>%
  ggplot( aes(x=RE_DATE, y=`neutrophils(%)`, group=PATIENT_ID, color=PATIENT_ID)) +
    geom_line() +
    geom_point() +
  facet_grid(rows=vars(outcome)) +
    ggtitle("Neutrophils (%) during patient hospitalization") +
    theme_ipsum() +
    ylab("Neutrophils (%)") +
    transition_reveal(RE_DATE)

#anim_save("neutrophils_seq.gif")

lymphocyte_seq <- patients_tests %>% filter(!is.na(`(%)lymphocyte`)) 

lymphocyte_seq %>% mutate(PATIENT_ID=as.factor(PATIENT_ID)) %>%
  ggplot( aes(x=RE_DATE, y=`(%)lymphocyte`, group=PATIENT_ID, color=PATIENT_ID)) +
    geom_line() +
    geom_point() +
  facet_grid(rows=vars(outcome)) +
    ggtitle("Lymphocyte (%) during patient hospitalization") +
    theme_ipsum() +
    ylab("Lymphocyte (%)") +
    transition_reveal(RE_DATE)

#anim_save("lymphocyte_seq.gif")

albumin_seq <- patients_tests %>% filter(!is.na(albumin)) 

albumin_seq %>% mutate(PATIENT_ID=as.factor(PATIENT_ID)) %>%
  ggplot( aes(x=RE_DATE, y=albumin, group=PATIENT_ID, color=PATIENT_ID)) +
    geom_line() +
    geom_point() +
  facet_grid(rows=vars(outcome)) +
    ggtitle("Albumin value during patient hospitalization") +
    theme_ipsum() +
    ylab("Albumin") +
    transition_reveal(RE_DATE)

#anim_save("albumin_seq.gif")

Prediction model

In order to build an appropriate model, there is a need for the data cleaning. There shouldn’t be any NA in the data set. Moreover, every considered patient should have all of considered biomarkers tested at least once. For building the model purpose, the last existing test of each biomarker for each patient was taken. As it was shown before, many biomarkers are unsuitable because of containing too many NA in theirs columns. Which means that they should be removed from the data set. Below is the list of remaining columns:

  • outcome
  • age
  • gender
  • hemoglobin
  • eosinophils(%)
  • Alkaline phosphatas
  • albumin
  • basophil(%)
  • Total bilirubin
  • Platelet count
  • monocytes(%)
  • neutrophils(%)
  • total protein
  • mean corpuscular volume
  • hematocrit
  • White blood cell count
  • mean corpuscular hemoglobin concentration
  • Urea
  • lymphocyte count
  • Red blood cell count
  • Eosinophil count
  • neutrophils count
  • Direct bilirubin
  • (%)lymphocyte
  • Total cholesterol
  • aspartate aminotransferase
  • Uric acid
  • HCO3-
  • Lactate dehydrogenase
  • monocytes count
  • globulin
  • γ-glutamyl transpeptidase
  • basophil count(#)
  • mean corpuscular hemoglobin
  • glutamic-pyruvic transaminase
  • eGFR
  • creatinine

With condition of existing at least one of listed above biomarkers tests, 19 patients should be ignored in building model process.

cleaned_df <- cleaned_df %>% select(-PATIENT_ID)
set.seed(23)
inTraining <- 
    createDataPartition(
        y = cleaned_df$outcome,
        p = .70,
        list = FALSE)


training <- cleaned_df[ inTraining,]
testing  <- cleaned_df[-inTraining,]

rfGrid <- expand.grid(mtry = 10:30)
ctrl <- trainControl(
    method = "repeatedcv",
    classProbs = TRUE,
    number = 2,
    repeats = 5)

set.seed(23)
fit <- train(outcome ~ .,
             data = training,
             method = "rf",
             metric = "ROC",
             preProc = c("center", "scale"),
             trControl = ctrl,
             tuneGrid = rfGrid,
             ntree = 30)
rfClasses <- predict(fit, newdata = testing)
confusionMatrix(data = rfClasses, testing$outcome)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Death Survival
##   Death       48        1
##   Survival     0       56
##                                           
##                Accuracy : 0.9905          
##                  95% CI : (0.9481, 0.9998)
##     No Information Rate : 0.5429          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9808          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.9825          
##          Pos Pred Value : 0.9796          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.4571          
##          Detection Rate : 0.4571          
##    Detection Prevalence : 0.4667          
##       Balanced Accuracy : 0.9912          
##                                           
##        'Positive' Class : Death           
## 

Imporance of attributes

varImp(fit)
## rf variable importance
## 
##   only 20 most important variables shown (out of 36)
## 
##                              Overall
## `Lactate dehydrogenase`      100.000
## `(%)lymphocyte`               56.749
## `neutrophils(%)`              31.971
## `Eosinophil count`            17.594
## albumin                       15.974
## `neutrophils count`           14.715
## `monocytes(%)`                14.162
## `Platelet count`              13.067
## `lymphocyte count`            10.040
## `aspartate aminotransferase`   9.925
## `eosinophils(%)`               6.399
## age                            3.518
## `Direct bilirubin`             3.514
## `Total cholesterol`            3.081
## `HCO3-`                        2.540
## Urea                           2.374
## eGFR                           2.171
## `γ-glutamyl transpeptidase`    1.967
## `Alkaline phosphatase`         1.607
## `total protein`                1.047